"""Pipeline trigger for git repos."""
import argparse
import copy
import datetime
import json
import os
import random
import re
import subprocess
import typing

from cki_lib import cki_pipeline
from cki_lib import config_tree
from cki_lib import gitlab
from cki_lib import misc
from cki_lib import yaml
from cki_lib.logger import get_logger
import dateutil
import sentry_sdk

LOGGER = get_logger('cki_tools.gitrepo_trigger')


def get_commit_hash(repository, git_ref):
    """Return commit hash for the ref in question.

    The reference needs to include the specifier, e.g. use 'refs/heads/main'
    instead of just 'main'; to avoid collisions.

    Args:
        repository: Git repository URL.
        git_ref:    Git reference to get commit hash for.

    Returns:
        String representing the commit hash or None if an error occurred.
    """
    LOGGER.debug('Getting the last commit from %s@%s', git_ref, repository)
    try:
        lines = subprocess.check_output(
            ['git', 'ls-remote', repository],
            timeout=60
        ).decode('utf-8').split('\n')
    # pylint: disable=broad-except
    except Exception:
        LOGGER.exception('Unable to list remote %s', repository)
        return None
    matches = [line for line in lines if line.endswith(git_ref)]
    if not matches:
        LOGGER.warning('Ref %s not found in remote %s', git_ref, repository)
        return None
    return matches[0].split()[0]


def last(gl_project, cki_pipeline_branch, variable_filter, **kwargs):
    """Return the last pipeline in branch@project matching the filters."""
    # about 5 pipelines/second, so 1000 pipelines take ~3 minutes before giving up
    return iter(cki_pipeline.last_pipelines_for_branch(
        gl_project, cki_pipeline_branch, max_count=1000, variable_filter={
            k: None if v is None else re.escape(v) for k, v in variable_filter.items()
        }, **kwargs))


def was_tested(gl_project, cki_pipeline_branch, commit_hash, variable_filter):
    """Return whether the pipeline exists and tested the specified commit."""
    return (
        (gl_pipeline := next(last(gl_project, cki_pipeline_branch, variable_filter), None)) and
        gitlab.get_variables(gl_pipeline).get('watched_repo_commit_hash') == commit_hash and
        gl_pipeline.jobs.list(per_page=1, all=False)
    )


def too_soon(gl_project, cki_pipeline_branch, minimum_interval, concurrency_filter):
    """Return whether an old pipeline exists that is not yet old enough."""
    return (
        minimum_interval and
        (gl_pipeline := next(last(gl_project, cki_pipeline_branch, concurrency_filter), None)) and
        (dateutil.parser.parse(gl_pipeline.created_at) +
         misc.parse_timedelta(minimum_interval) >
         datetime.datetime.now(tz=datetime.timezone.utc))
    )


def too_many(gl_project, cki_pipeline_branch, concurrency_limit, concurrency_filter):
    """Return whether too many running pipelines exist."""
    return (
        concurrency_limit and
        (len(list(last(gl_project, cki_pipeline_branch, concurrency_filter,
                       pipeline_count=concurrency_limit, list_filter={'scope': 'running'}))) >=
         concurrency_limit)
    )


def get_triggers(gl_instance, config):
    """Determine the trigger variables."""
    configs = list(config_tree.process_config_tree(config).values())
    random.shuffle(configs)  # for concurrency groups, randomly give every config a chance
    for value in configs:
        for branch in value['.branches']:
            trigger = copy.deepcopy(value)
            gl_project = cki_pipeline.pipeline_project(gl_instance, trigger)

            trigger['commit_hash'] = get_commit_hash(trigger['git_url'], f'refs/heads/{branch}')
            if trigger['commit_hash'] is None:
                continue

            trigger['branch'] = branch
            if report_rules := trigger.get('.report_rules'):
                trigger['report_rules'] = json.dumps(report_rules)

            # Check the URL and branch separately - it's possible to have the same
            # branch name in different repos people use, e.g. version based naming
            trigger.setdefault('watch_url', trigger['git_url'])
            trigger.setdefault('watch_branch', trigger['branch'])
            trigger['watched_repo_commit_hash'] = get_commit_hash(
                trigger['watch_url'], f'refs/heads/{trigger["watch_branch"]}'
            )

            variable_filter = {
                'watch_url': trigger['watch_url'],
                'watch_branch': trigger['watch_branch'],
                'package_name': trigger.get('package_name'),
            }
            if concurrency_group := trigger.get('concurrency_group'):
                concurrency_filter = {'concurrency_group': concurrency_group}
            else:
                concurrency_filter = variable_filter

            if was_tested(gl_project, trigger['cki_pipeline_branch'],
                          trigger['watched_repo_commit_hash'], variable_filter):
                LOGGER.info('Pipeline for %s (%s) already triggered. Watched %s@%s',
                            trigger['branch'], trigger['commit_hash'],
                            trigger['watch_branch'], trigger['watch_url'])
                continue

            if too_soon(gl_project, trigger['cki_pipeline_branch'],
                        trigger.get('minimum_interval'), concurrency_filter):
                LOGGER.info('Pipelines for %s (%s) spawning too fast: less than %s ago',
                            trigger['branch'], trigger['commit_hash'], trigger['minimum_interval'])
                continue

            if too_many(gl_project, trigger['cki_pipeline_branch'],
                        trigger.get('concurrency_limit'), concurrency_filter):
                LOGGER.info('Already %s running pipelines for %s (%s)',
                            trigger['concurrency_limit'], trigger['branch'], trigger['commit_hash'])
                continue

            title = ['Baseline', trigger['git_url'], trigger['branch'], trigger['commit_hash'][:12]]
            if (package_name := trigger.get('package_name', 'kernel')) != 'kernel':
                title.append(package_name)
            trigger['title'] = ' - '.join(title)
            yield config_tree.clean_config(trigger)


def main(args: typing.Optional[typing.List[str]] = None) -> None:
    """Run it."""
    parser = argparse.ArgumentParser()
    parser.add_argument('--gitlab-url', default=os.environ.get('GITLAB_URL'),
                        help='GitLab URL')
    parser.add_argument('--config', default=os.environ.get('GITREPO_TRIGGER_CONFIG'),
                        help='YAML configuration file to use')
    parser.add_argument('--config-path', default=os.environ.get('GITREPO_TRIGGER_CONFIG_PATH'),
                        help='Path to YAML configuration file')
    parsed_args = parser.parse_args(args)

    config = yaml.load(
        contents=parsed_args.config,
        file_path=parsed_args.config_path,
        resolve_references=True,
    )

    gl_instance = gitlab.get_instance(parsed_args.gitlab_url)
    for trigger in get_triggers(gl_instance, config):
        cki_pipeline.trigger_multiple(gl_instance, [trigger])


if __name__ == '__main__':
    misc.sentry_init(sentry_sdk)
    main()
